home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 10 / AACD 10.iso / AACD / Games / WarpQuake / Src / d_polysa.s < prev    next >
Text File  |  2000-05-22  |  46KB  |  1,745 lines

  1. /*
  2. Copyright (C) 1996-1997 Id Software, Inc.
  3.  
  4. This program is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU General Public License
  6. as published by the Free Software Foundation; either version 2
  7. of the License, or (at your option) any later version.
  8.  
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
  12.  
  13. See the GNU General Public License for more details.
  14.  
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  18.  
  19. */
  20. //
  21. // d_polysa.s
  22. // x86 assembly-language polygon model drawing code
  23. //
  24.  
  25. #include "asm_i386.h"
  26. #include "quakeasm.h"
  27. #include "asm_draw.h"
  28. #include "d_ifacea.h"
  29.  
  30. #if    id386
  31.  
  32. // !!! if this is changed, it must be changed in d_polyse.c too !!!
  33. #define DPS_MAXSPANS            MAXHEIGHT+1    
  34.                                     // 1 extra for spanpackage that marks end
  35.  
  36. //#define    SPAN_SIZE    (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
  37. #define SPAN_SIZE (1024+1+1+1)*32
  38.  
  39.  
  40.     .data
  41.  
  42.     .align    4
  43. p10_minus_p20:    .single        0
  44. p01_minus_p21:    .single        0
  45. temp0:            .single        0
  46. temp1:            .single        0
  47. Ltemp:            .single        0
  48.  
  49. aff8entryvec_table:    .long    LDraw8, LDraw7, LDraw6, LDraw5
  50.                 .long    LDraw4, LDraw3, LDraw2, LDraw1
  51.  
  52. lzistepx:        .long    0
  53.  
  54.  
  55.     .text
  56.  
  57. #ifndef NeXT
  58.     .extern C(D_PolysetSetEdgeTable)
  59.     .extern C(D_RasterizeAliasPolySmooth)
  60. #endif
  61.  
  62. //----------------------------------------------------------------------
  63. // affine triangle gradient calculation code
  64. //----------------------------------------------------------------------
  65.  
  66. #define skinwidth    4+0
  67.  
  68. .globl C(D_PolysetCalcGradients)
  69. C(D_PolysetCalcGradients):
  70.  
  71. //    p00_minus_p20 = r_p0[0] - r_p2[0];
  72. //    p01_minus_p21 = r_p0[1] - r_p2[1];
  73. //    p10_minus_p20 = r_p1[0] - r_p2[0];
  74. //    p11_minus_p21 = r_p1[1] - r_p2[1];
  75. //
  76. //    xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
  77. //                 p00_minus_p20 * p11_minus_p21);
  78. //
  79. //    ystepdenominv = -xstepdenominv;
  80.  
  81.     fildl    C(r_p0)+0        // r_p0[0]
  82.     fildl    C(r_p2)+0        // r_p2[0] | r_p0[0]
  83.     fildl    C(r_p0)+4        // r_p0[1] | r_p2[0] | r_p0[0]
  84.     fildl    C(r_p2)+4        // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
  85.     fildl    C(r_p1)+0        // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
  86.     fildl    C(r_p1)+4        // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
  87.                             //  r_p2[0] | r_p0[0]
  88.     fxch    %st(3)            // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
  89.                             //  r_p2[0] | r_p0[0]
  90.     fsub    %st(2),%st(0)    // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
  91.                             //  r_p2[0] | r_p0[0]
  92.     fxch    %st(1)            // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
  93.                             //  r_p2[0] | r_p0[0]
  94.     fsub    %st(4),%st(0)    // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
  95.                             //  r_p1[1] | r_p2[0] | r_p0[0]
  96.     fxch    %st(5)            // r_p0[0] | p01_minus_p21 | r_p2[1] |
  97.                             //  r_p1[1] | r_p2[0] | p10_minus_p20
  98.     fsubp    %st(0),%st(4)    // p01_minus_p21 | r_p2[1] | r_p1[1] |
  99.                             //  p00_minus_p20 | p10_minus_p20
  100.     fxch    %st(2)            // r_p1[1] | r_p2[1] | p01_minus_p21 |
  101.                             //  p00_minus_p20 | p10_minus_p20
  102.     fsubp    %st(0),%st(1)    // p11_minus_p21 | p01_minus_p21 |
  103.                             //  p00_minus_p20 | p10_minus_p20
  104.     fxch    %st(1)            // p01_minus_p21 | p11_minus_p21 |
  105.                             //  p00_minus_p20 | p10_minus_p20
  106.     flds    C(d_xdenom)        // d_xdenom | p01_minus_p21 | p11_minus_p21 |
  107.                             //  p00_minus_p20 | p10_minus_p20
  108.     fxch    %st(4)            // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
  109.                             //  p00_minus_p20 | d_xdenom
  110.     fstps    p10_minus_p20    // p01_minus_p21 | p11_minus_p21 |
  111.                             //  p00_minus_p20 | d_xdenom
  112.     fstps    p01_minus_p21    // p11_minus_p21 | p00_minus_p20 | xstepdenominv
  113.     fxch    %st(2)            // xstepdenominv | p00_minus_p20 | p11_minus_p21
  114.  
  115. //// ceil () for light so positive steps are exaggerated, negative steps
  116. //// diminished,  pushing us away from underflow toward overflow. Underflow is
  117. //// very visible, overflow is very unlikely, because of ambient lighting
  118. //    t0 = r_p0[4] - r_p2[4];
  119. //    t1 = r_p1[4] - r_p2[4];
  120.  
  121.     fildl    C(r_p2)+16        // r_p2[4] | xstepdenominv | p00_minus_p20 |
  122.                             //  p11_minus_p21
  123.     fildl    C(r_p0)+16        // r_p0[4] | r_p2[4] | xstepdenominv |
  124.                             //  p00_minus_p20 | p11_minus_p21
  125.     fildl    C(r_p1)+16        // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
  126.                             //  p00_minus_p20 | p11_minus_p21
  127.     fxch    %st(2)            // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
  128.                             //  p00_minus_p20 | p11_minus_p21
  129.     fld        %st(0)            // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
  130.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  131.     fsubrp    %st(0),%st(2)    // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
  132.                             //  p00_minus_p20 | p11_minus_p21
  133.     fsubrp    %st(0),%st(2)    // t0 | t1 | xstepdenominv | p00_minus_p20 |
  134.                             //  p11_minus_p21
  135.  
  136. //    r_lstepx = (int)
  137. //            ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
  138. //    r_lstepy = (int)
  139. //            ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
  140.  
  141.     fld        %st(0)            // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
  142.                             //  p11_minus_p21
  143.     fmul    %st(5),%st(0)    // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
  144.                             //  p00_minus_p20 | p11_minus_p21
  145.     fxch    %st(2)            // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
  146.                             //  p00_minus_p20 | p11_minus_p21
  147.     fld        %st(0)            // t1 | t1 | t0 | t0*p11_minus_p21 |
  148.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  149.     fmuls    p01_minus_p21    // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  150.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  151.     fxch    %st(2)            // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  152.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  153.     fmuls    p10_minus_p20    // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  154.                             //  t0*p11_minus_p21 | xstepdenominv |
  155.                             //  p00_minus_p20 | p11_minus_p21
  156.     fxch    %st(1)            // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  157.                             //  t0*p11_minus_p21 | xstepdenominv |
  158.                             //  p00_minus_p20 | p11_minus_p21
  159.     fmul    %st(5),%st(0)    // t1*p00_minus_p20 | t0*p10_minus_p20 |
  160.                             //  t1*p01_minus_p21 | t0*p11_minus_p21 |
  161.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  162.     fxch    %st(2)            // t1*p01_minus_p21 | t0*p10_minus_p20 |
  163.                             //  t1*p00_minus_p20 | t0*p11_minus_p21 |
  164.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  165.     fsubp    %st(0),%st(3)    // t0*p10_minus_p20 | t1*p00_minus_p20 |
  166.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  167.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  168.     fsubrp    %st(0),%st(1)    // t1*p00_minus_p20 - t0*p10_minus_p20 |
  169.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  170.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  171.     fld        %st(2)            // xstepdenominv |
  172.                             //  t1*p00_minus_p20 - t0*p10_minus_p20 |
  173.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  174.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  175.     fmuls    float_minus_1    // ystepdenominv |
  176.                             //  t1*p00_minus_p20 - t0*p10_minus_p20 |
  177.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  178.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  179.     fxch    %st(2)            // t1*p01_minus_p21 - t0*p11_minus_p21 |
  180.                             //  t1*p00_minus_p20 - t0*p10_minus_p20 |
  181.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  182.                             //  p11_minus_p21
  183.     fmul    %st(3),%st(0)    // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  184.                             //   xstepdenominv |
  185.                             //  t1*p00_minus_p20 - t0*p10_minus_p20 |
  186.                             //   | ystepdenominv | xstepdenominv |
  187.                             //   p00_minus_p20 | p11_minus_p21
  188.     fxch    %st(1)            // t1*p00_minus_p20 - t0*p10_minus_p20 |
  189.                             //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
  190.                             //   xstepdenominv | ystepdenominv |
  191.                             //   xstepdenominv | p00_minus_p20 | p11_minus_p21
  192.     fmul    %st(2),%st(0)    // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  193.                             //  ystepdenominv |
  194.                             //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
  195.                             //  xstepdenominv | ystepdenominv |
  196.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  197.     fldcw    ceil_cw
  198.     fistpl    C(r_lstepy)        // r_lstepx | ystepdenominv | xstepdenominv |
  199.                             //  p00_minus_p20 | p11_minus_p21
  200.     fistpl    C(r_lstepx)        // ystepdenominv | xstepdenominv | p00_minus_p20 |
  201.                             //  p11_minus_p21
  202.     fldcw    single_cw
  203.  
  204. //    t0 = r_p0[2] - r_p2[2];
  205. //    t1 = r_p1[2] - r_p2[2];
  206.  
  207.     fildl    C(r_p2)+8        // r_p2[2] | ystepdenominv | xstepdenominv |
  208.                             //  p00_minus_p20 | p11_minus_p21
  209.     fildl    C(r_p0)+8        // r_p0[2] | r_p2[2] | ystepdenominv |
  210.                             //   xstepdenominv | p00_minus_p20 | p11_minus_p21
  211.     fildl    C(r_p1)+8        // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
  212.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  213.     fxch    %st(2)            // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
  214.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  215.     fld        %st(0)            // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
  216.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  217.                             //  p11_minus_p21
  218.     fsubrp    %st(0),%st(2)    // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
  219.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  220.     fsubrp    %st(0),%st(2)    // t0 | t1 | ystepdenominv | xstepdenominv |
  221.                             //  p00_minus_p20 | p11_minus_p21
  222.  
  223. //    r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  224. //            xstepdenominv);
  225. //    r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  226. //            ystepdenominv);
  227.  
  228.     fld        %st(0)            // t0 | t0 | t1 | ystepdenominv | xstepdenominv
  229.     fmul    %st(6),%st(0)    // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
  230.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  231.     fxch    %st(2)            // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
  232.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  233.     fld        %st(0)            // t1 | t1 | t0 | t0*p11_minus_p21 |
  234.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  235.                             //  p11_minus_p21
  236.     fmuls    p01_minus_p21    // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  237.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  238.                             //  p11_minus_p21
  239.     fxch    %st(2)            // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  240.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  241.                             //  p11_minus_p21
  242.     fmuls    p10_minus_p20    // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  243.                             //  t0*p11_minus_p21 | ystepdenominv |
  244.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  245.     fxch    %st(1)            // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  246.                             //  t0*p11_minus_p21 | ystepdenominv |
  247.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  248.     fmul    %st(6),%st(0)    // t1*p00_minus_p20 | t0*p10_minus_p20 |
  249.                             //  t1*p01_minus_p21 | t0*p11_minus_p21 |
  250.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  251.                             //  p11_minus_p21
  252.     fxch    %st(2)            // t1*p01_minus_p21 | t0*p10_minus_p20 |
  253.                             //  t1*p00_minus_p20 | t0*p11_minus_p21 |
  254.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  255.                             //  p11_minus_p21
  256.     fsubp    %st(0),%st(3)    // t0*p10_minus_p20 | t1*p00_minus_p20 |
  257.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  258.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  259.                             //  p11_minus_p21
  260.     fsubrp    %st(0),%st(1)    // t1*p00_minus_p20 - t0*p10_minus_p20 |
  261.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  262.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  263.                             //  p11_minus_p21
  264.     fmul    %st(2),%st(0)    // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  265.                             //   ystepdenominv |
  266.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  267.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  268.                             //  p11_minus_p21
  269.     fxch    %st(1)            // t1*p01_minus_p21 - t0*p11_minus_p21 |
  270.                             //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
  271.                             //   ystepdenominv | ystepdenominv |
  272.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  273.     fmul    %st(3),%st(0)    // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  274.                             //  xstepdenominv |
  275.                             //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
  276.                             //  ystepdenominv | ystepdenominv |
  277.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  278.     fxch    %st(1)            // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  279.                             //  ystepdenominv |
  280.                             //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
  281.                             //  xstepdenominv | ystepdenominv |
  282.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  283.     fistpl    C(r_sstepy)        // r_sstepx | ystepdenominv | xstepdenominv |
  284.                             //  p00_minus_p20 | p11_minus_p21
  285.     fistpl    C(r_sstepx)        // ystepdenominv | xstepdenominv | p00_minus_p20 |
  286.                             //  p11_minus_p21
  287.  
  288. //    t0 = r_p0[3] - r_p2[3];
  289. //    t1 = r_p1[3] - r_p2[3];
  290.  
  291.     fildl    C(r_p2)+12        // r_p2[3] | ystepdenominv | xstepdenominv |
  292.                             //  p00_minus_p20 | p11_minus_p21
  293.     fildl    C(r_p0)+12        // r_p0[3] | r_p2[3] | ystepdenominv |
  294.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  295.     fildl    C(r_p1)+12        // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
  296.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  297.     fxch    %st(2)            // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
  298.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  299.     fld        %st(0)            // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
  300.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  301.                             //  p11_minus_p21
  302.     fsubrp    %st(0),%st(2)    // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
  303.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  304.     fsubrp    %st(0),%st(2)    // t0 | t1 | ystepdenominv | xstepdenominv |
  305.                             //  p00_minus_p20 | p11_minus_p21
  306.  
  307. //    r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  308. //            xstepdenominv);
  309. //    r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  310. //            ystepdenominv);
  311.  
  312.     fld        %st(0)            // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
  313.                             //  p00_minus_p20 | p11_minus_p21
  314.     fmul    %st(6),%st(0)    // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
  315.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  316.     fxch    %st(2)            // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
  317.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  318.     fld        %st(0)            // t1 | t1 | t0 | t0*p11_minus_p21 |
  319.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  320.                             //  p11_minus_p21
  321.     fmuls    p01_minus_p21    // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
  322.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  323.                             //  p11_minus_p21
  324.     fxch    %st(2)            // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
  325.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  326.                             //  p11_minus_p21
  327.     fmuls    p10_minus_p20    // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  328.                             //  t0*p11_minus_p21 | ystepdenominv |
  329.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  330.     fxch    %st(1)            // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  331.                             //  t0*p11_minus_p21 | ystepdenominv |
  332.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  333.     fmul    %st(6),%st(0)    // t1*p00_minus_p20 | t0*p10_minus_p20 |
  334.                             //  t1*p01_minus_p21 | t0*p11_minus_p21 |
  335.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  336.                             //  p11_minus_p21
  337.     fxch    %st(2)            // t1*p01_minus_p21 | t0*p10_minus_p20 |
  338.                             //  t1*p00_minus_p20 | t0*p11_minus_p21 |
  339.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  340.                             //  p11_minus_p21
  341.     fsubp    %st(0),%st(3)    // t0*p10_minus_p20 | t1*p00_minus_p20 |
  342.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  343.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  344.                             //  p11_minus_p21
  345.     fsubrp    %st(0),%st(1)    // t1*p00_minus_p20 - t0*p10_minus_p20 |
  346.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  347.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  348.                             //  p11_minus_p21
  349.     fmul    %st(2),%st(0)    // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  350.                             //   ystepdenominv |
  351.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  352.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  353.                             //  p11_minus_p21
  354.     fxch    %st(1)            // t1*p01_minus_p21 - t0*p11_minus_p21 |
  355.                             //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
  356.                             //  ystepdenominv | ystepdenominv |
  357.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  358.     fmul    %st(3),%st(0)    // (t1*p01_minus_p21 - t0*p11_minus_p21)*
  359.                             //  xstepdenominv |
  360.                             //  (t1*p00_minus_p20 - t0*p10_minus_p20)*
  361.                             //  ystepdenominv | ystepdenominv |
  362.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  363.     fxch    %st(1)            // (t1*p00_minus_p20 - t0*p10_minus_p20)*
  364.                             //  ystepdenominv |
  365.                             //  (t1*p01_minus_p21 - t0*p11_minus_p21)*
  366.                             //  xstepdenominv | ystepdenominv |
  367.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  368.     fistpl    C(r_tstepy)        // r_tstepx | ystepdenominv | xstepdenominv |
  369.                             //  p00_minus_p20 | p11_minus_p21
  370.     fistpl    C(r_tstepx)        // ystepdenominv | xstepdenominv | p00_minus_p20 |
  371.                             //  p11_minus_p21
  372.  
  373. //    t0 = r_p0[5] - r_p2[5];
  374. //    t1 = r_p1[5] - r_p2[5];
  375.  
  376.     fildl    C(r_p2)+20        // r_p2[5] | ystepdenominv | xstepdenominv |
  377.                             //  p00_minus_p20 | p11_minus_p21
  378.     fildl    C(r_p0)+20        // r_p0[5] | r_p2[5] | ystepdenominv |
  379.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  380.     fildl    C(r_p1)+20        // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
  381.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  382.     fxch    %st(2)            // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
  383.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  384.     fld        %st(0)            // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
  385.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  386.                             //  p11_minus_p21
  387.     fsubrp    %st(0),%st(2)    // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
  388.                             //  xstepdenominv | p00_minus_p20 | p11_minus_p21
  389.     fsubrp    %st(0),%st(2)    // t0 | t1 | ystepdenominv | xstepdenominv |
  390.                             //  p00_minus_p20 | p11_minus_p21
  391.  
  392. //    r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
  393. //            xstepdenominv);
  394. //    r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
  395. //            ystepdenominv);
  396.  
  397.     fld        %st(0)            // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
  398.                             //  p00_minus_p20 | p11_minus_p21
  399.     fmulp    %st(0),%st(6)    // t0 | t1 | ystepdenominv | xstepdenominv |
  400.                             //  p00_minus_p20 | t0*p11_minus_p21
  401.     fxch    %st(1)            // t1 | t0 | ystepdenominv | xstepdenominv |
  402.                             //  p00_minus_p20 | t0*p11_minus_p21
  403.     fld        %st(0)            // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
  404.                             //  p00_minus_p20 | t0*p11_minus_p21
  405.     fmuls    p01_minus_p21    // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
  406.                             //  xstepdenominv | p00_minus_p20 |
  407.                             //  t0*p11_minus_p21
  408.     fxch    %st(2)            // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
  409.                             //  xstepdenominv | p00_minus_p20 |
  410.                             //  t0*p11_minus_p21
  411.     fmuls    p10_minus_p20    // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
  412.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  413.                             //  t0*p11_minus_p21
  414.     fxch    %st(1)            // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
  415.                             //  ystepdenominv | xstepdenominv | p00_minus_p20 |
  416.                             //  t0*p11_minus_p21
  417.     fmulp    %st(0),%st(5)    // t0*p10_minus_p20 | t1*p01_minus_p21 |
  418.                             //  ystepdenominv | xstepdenominv |
  419.                             //  t1*p00_minus_p20 | t0*p11_minus_p21
  420.     fxch    %st(5)            // t0*p11_minus_p21 | t1*p01_minus_p21 |
  421.                             //  ystepdenominv | xstepdenominv |
  422.                             //  t1*p00_minus_p20 | t0*p10_minus_p20
  423.     fsubrp    %st(0),%st(1)    // t1*p01_minus_p21 - t0*p11_minus_p21 |
  424.                             //  ystepdenominv | xstepdenominv |
  425.                             //  t1*p00_minus_p20 | t0*p10_minus_p20
  426.     fxch    %st(3)            // t1*p00_minus_p20 | ystepdenominv |
  427.                             //  xstepdenominv |
  428.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  429.                             //  t0*p10_minus_p20
  430.     fsubp    %st(0),%st(4)    // ystepdenominv | xstepdenominv |
  431.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  432.                             //  t1*p00_minus_p20 - t0*p10_minus_p20
  433.     fxch    %st(1)            // xstepdenominv | ystepdenominv |
  434.                             //  t1*p01_minus_p21 - t0*p11_minus_p21 |
  435.                             //  t1*p00_minus_p20 - t0*p10_minus_p20
  436.     fmulp    %st(0),%st(2)    // ystepdenominv |
  437.                             //  (t1*p01_minus_p21 - t0*p11_minus_p21) *
  438.                             //  xstepdenominv |
  439.                             //  t1*p00_minus_p20 - t0*p10_minus_p20
  440.     fmulp    %st(0),%st(2)    // (t1*p01_minus_p21 - t0*p11_minus_p21) *
  441.                             //  xstepdenominv |
  442.                             //  (t1*p00_minus_p20 - t0*p10_minus_p20) *
  443.                             //  ystepdenominv
  444.     fistpl    C(r_zistepx)    // (t1*p00_minus_p20 - t0*p10_minus_p20) *
  445.                             //  ystepdenominv
  446.     fistpl    C(r_zistepy)
  447.  
  448. //    a_sstepxfrac = r_sstepx << 16;
  449. //    a_tstepxfrac = r_tstepx << 16;
  450. //
  451. //    a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
  452. //            (r_sstepx >> 16);
  453.  
  454.     movl    C(r_sstepx),%eax
  455.     movl    C(r_tstepx),%edx
  456.     shll    $16,%eax
  457.     shll    $16,%edx
  458.     movl    %eax,C(a_sstepxfrac)
  459.     movl    %edx,C(a_tstepxfrac)
  460.  
  461.     movl    C(r_sstepx),%ecx
  462.     movl    C(r_tstepx),%eax
  463.     sarl    $16,%ecx
  464.     sarl    $16,%eax
  465.     imull    skinwidth(%esp)
  466.     addl    %ecx,%eax
  467.     movl    %eax,C(a_ststepxwhole)
  468.  
  469.     ret
  470.  
  471.  
  472. //----------------------------------------------------------------------
  473. // recursive subdivision affine triangle drawing code
  474. //
  475. // not C-callable because of stdcall return
  476. //----------------------------------------------------------------------
  477.  
  478. #define lp1    4+16
  479. #define lp2    8+16
  480. #define lp3    12+16
  481.  
  482. .globl C(D_PolysetRecursiveTriangle)
  483. C(D_PolysetRecursiveTriangle):
  484.     pushl    %ebp                // preserve caller stack frame pointer
  485.     pushl    %esi                // preserve register variables
  486.     pushl    %edi
  487.     pushl    %ebx
  488.  
  489. //    int        *temp;
  490. //    int        d;
  491. //    int        new[6];
  492. //    int        i;
  493. //    int        z;
  494. //    short    *zbuf;
  495.     movl    lp2(%esp),%esi
  496.     movl    lp1(%esp),%ebx
  497.     movl    lp3(%esp),%edi
  498.  
  499. //    d = lp2[0] - lp1[0];
  500. //    if (d < -1 || d > 1)
  501. //        goto split;
  502.     movl    0(%esi),%eax
  503.  
  504.     movl    0(%ebx),%edx
  505.     movl    4(%esi),%ebp
  506.  
  507.     subl    %edx,%eax
  508.     movl    4(%ebx),%ecx
  509.  
  510.     subl    %ecx,%ebp
  511.     incl    %eax
  512.  
  513.     cmpl    $2,%eax
  514.     ja        LSplit
  515.  
  516. //    d = lp2[1] - lp1[1];
  517. //    if (d < -1 || d > 1)
  518. //        goto split;
  519.     movl    0(%edi),%eax
  520.     incl    %ebp
  521.  
  522.     cmpl    $2,%ebp
  523.     ja        LSplit
  524.  
  525. //    d = lp3[0] - lp2[0];
  526. //    if (d < -1 || d > 1)
  527. //        goto split2;
  528.     movl    0(%esi),%edx
  529.     movl    4(%edi),%ebp
  530.  
  531.     subl    %edx,%eax
  532.     movl    4(%esi),%ecx
  533.  
  534.     subl    %ecx,%ebp
  535.     incl    %eax
  536.  
  537.     cmpl    $2,%eax
  538.     ja        LSplit2
  539.  
  540. //    d = lp3[1] - lp2[1];
  541. //    if (d < -1 || d > 1)
  542. //        goto split2;
  543.     movl    0(%ebx),%eax
  544.     incl    %ebp
  545.  
  546.     cmpl    $2,%ebp
  547.     ja        LSplit2
  548.  
  549. //    d = lp1[0] - lp3[0];
  550. //    if (d < -1 || d > 1)
  551. //        goto split3;
  552.     movl    0(%edi),%edx
  553.     movl    4(%ebx),%ebp
  554.  
  555.     subl    %edx,%eax
  556.     movl    4(%edi),%ecx
  557.  
  558.     subl    %ecx,%ebp
  559.     incl    %eax
  560.  
  561.     incl    %ebp
  562.     movl    %ebx,%edx
  563.  
  564.     cmpl    $2,%eax
  565.     ja        LSplit3
  566.  
  567. //    d = lp1[1] - lp3[1];
  568. //    if (d < -1 || d > 1)
  569. //    {
  570. //split3:
  571. //        temp = lp1;
  572. //        lp3 = lp2;
  573. //        lp1 = lp3;
  574. //        lp2 = temp;
  575. //        goto split;
  576. //    }
  577. //
  578. //    return;            // entire tri is filled
  579. //
  580.     cmpl    $2,%ebp
  581.     jna        LDone
  582.  
  583. LSplit3:
  584.     movl    %edi,%ebx
  585.     movl    %esi,%edi
  586.     movl    %edx,%esi
  587.     jmp        LSplit
  588.  
  589. //split2:
  590. LSplit2:
  591.  
  592. //    temp = lp1;
  593. //    lp1 = lp2;
  594. //    lp2 = lp3;
  595. //    lp3 = temp;
  596.     movl    %ebx,%eax
  597.     movl    %esi,%ebx
  598.     movl    %edi,%esi
  599.     movl    %eax,%edi
  600.  
  601. //split:
  602. LSplit:
  603.  
  604.     subl    $24,%esp        // allocate space for a new vertex
  605.  
  606. //// split this edge
  607. //    new[0] = (lp1[0] + lp2[0]) >> 1;
  608. //    new[1] = (lp1[1] + lp2[1]) >> 1;
  609. //    new[2] = (lp1[2] + lp2[2]) >> 1;
  610. //    new[3] = (lp1[3] + lp2[3]) >> 1;
  611. //    new[5] = (lp1[5] + lp2[5]) >> 1;
  612.     movl    8(%ebx),%eax
  613.  
  614.     movl    8(%esi),%edx
  615.     movl    12(%ebx),%ecx
  616.  
  617.     addl    %edx,%eax
  618.     movl    12(%esi),%edx
  619.  
  620.     sarl    $1,%eax
  621.     addl    %edx,%ecx
  622.  
  623.     movl    %eax,8(%esp)
  624.     movl    20(%ebx),%eax
  625.  
  626.     sarl    $1,%ecx
  627.     movl    20(%esi),%edx
  628.  
  629.     movl    %ecx,12(%esp)
  630.     addl    %edx,%eax
  631.  
  632.     movl    0(%ebx),%ecx
  633.     movl    0(%esi),%edx
  634.  
  635.     sarl    $1,%eax
  636.     addl    %ecx,%edx
  637.  
  638.     movl    %eax,20(%esp)
  639.     movl    4(%ebx),%eax
  640.  
  641.     sarl    $1,%edx
  642.     movl    4(%esi),%ebp
  643.  
  644.     movl    %edx,0(%esp)
  645.     addl    %eax,%ebp
  646.  
  647.     sarl    $1,%ebp
  648.     movl    %ebp,4(%esp)
  649.  
  650. //// draw the point if splitting a leading edge
  651. //    if (lp2[1] > lp1[1])
  652. //        goto nodraw;
  653.     cmpl    %eax,4(%esi)
  654.     jg        LNoDraw
  655.  
  656. //    if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
  657. //        goto nodraw;
  658.     movl    0(%esi),%edx
  659.     jnz        LDraw
  660.  
  661.     cmpl    %ecx,%edx
  662.     jl        LNoDraw
  663.  
  664. LDraw:
  665.  
  666. // z = new[5] >> 16;
  667.     movl    20(%esp),%edx
  668.     movl    4(%esp),%ecx
  669.  
  670.     sarl    $16,%edx
  671.     movl    0(%esp),%ebp
  672.  
  673. //    zbuf = zspantable[new[1]] + new[0];
  674.     movl    C(zspantable)(,%ecx,4),%eax
  675.  
  676. //    if (z >= *zbuf)
  677. //    {
  678.     cmpw    (%eax,%ebp,2),%dx
  679.     jnge    LNoDraw
  680.  
  681. //        int        pix;
  682. //        
  683. //        *zbuf = z;
  684.     movw    %dx,(%eax,%ebp,2)
  685.  
  686. //        pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
  687.     movl    12(%esp),%eax
  688.  
  689.     sarl    $16,%eax
  690.     movl    8(%esp),%edx
  691.  
  692.     sarl    $16,%edx
  693.     subl    %ecx,%ecx
  694.  
  695.     movl    C(skintable)(,%eax,4),%eax
  696.     movl    4(%esp),%ebp
  697.  
  698.     movb    (%eax,%edx,),%cl
  699.     movl    C(d_pcolormap),%edx
  700.  
  701.     movb    (%edx,%ecx,),%dl
  702.     movl    0(%esp),%ecx
  703.  
  704. //        d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
  705.     movl    C(d_scantable)(,%ebp,4),%eax
  706.     addl    %eax,%ecx
  707.     movl    C(d_viewbuffer),%eax
  708.     movb    %dl,(%eax,%ecx,1)
  709.  
  710. //    }
  711. //
  712. //nodraw:
  713. LNoDraw:
  714.  
  715. //// recursively continue
  716. //    D_PolysetRecursiveTriangle (lp3, lp1, new);
  717.     pushl    %esp
  718.     pushl    %ebx
  719.     pushl    %edi
  720.     call    C(D_PolysetRecursiveTriangle)
  721.  
  722. //    D_PolysetRecursiveTriangle (lp3, new, lp2);
  723.     movl    %esp,%ebx
  724.     pushl    %esi
  725.     pushl    %ebx
  726.     pushl    %edi
  727.     call    C(D_PolysetRecursiveTriangle)
  728.     addl    $24,%esp
  729.  
  730. LDone:
  731.     popl    %ebx                // restore register variables
  732.     popl    %edi
  733.     popl    %esi
  734.     popl    %ebp                // restore caller stack frame pointer
  735.     ret        $12
  736.  
  737.  
  738. //----------------------------------------------------------------------
  739. // 8-bpp horizontal span drawing code for affine polygons, with smooth
  740. // shading and no transparency
  741. //----------------------------------------------------------------------
  742.  
  743. #define pspans    4+8
  744.  
  745. .globl C(D_PolysetAff8Start)
  746. C(D_PolysetAff8Start):
  747.  
  748. .globl C(D_PolysetDrawSpans8)
  749. C(D_PolysetDrawSpans8):
  750.     pushl    %esi                // preserve register variables
  751.     pushl    %ebx
  752.  
  753.     movl    pspans(%esp),%esi    // point to the first span descriptor
  754.     movl    C(r_zistepx),%ecx
  755.  
  756.     pushl    %ebp                // preserve caller's stack frame
  757.     pushl    %edi
  758.  
  759.     rorl    $16,%ecx            // put high 16 bits of 1/z step in low word
  760.     movl    spanpackage_t_count(%esi),%edx
  761.  
  762.     movl    %ecx,lzistepx
  763.  
  764. LSpanLoop:
  765.  
  766. //        lcount = d_aspancount - pspanpackage->count;
  767. //
  768. //        errorterm += erroradjustup;
  769. //        if (errorterm >= 0)
  770. //        {
  771. //            d_aspancount += d_countextrastep;
  772. //            errorterm -= erroradjustdown;
  773. //        }
  774. //        else
  775. //        {
  776. //            d_aspancount += ubasestep;
  777. //        }
  778.     movl    C(d_aspancount),%eax
  779.     subl    %edx,%eax
  780.  
  781.     movl    C(erroradjustup),%edx
  782.     movl    C(errorterm),%ebx
  783.     addl    %edx,%ebx
  784.     js        LNoTurnover
  785.  
  786.     movl    C(erroradjustdown),%edx
  787.     movl    C(d_countextrastep),%edi
  788.     subl    %edx,%ebx
  789.     movl    C(d_aspancount),%ebp
  790.     movl    %ebx,C(errorterm)
  791.     addl    %edi,%ebp
  792.     movl    %ebp,C(d_aspancount)
  793.     jmp        LRightEdgeStepped
  794.  
  795. LNoTurnover:
  796.     movl    C(d_aspancount),%edi
  797.     movl    C(ubasestep),%edx
  798.     movl    %ebx,C(errorterm)
  799.     addl    %edx,%edi
  800.     movl    %edi,C(d_aspancount)
  801.  
  802. LRightEdgeStepped:
  803.     cmpl    $1,%eax
  804.  
  805.     jl        LNextSpan
  806.     jz        LExactlyOneLong
  807.  
  808. //
  809. // set up advancetable
  810. //
  811.     movl    C(a_ststepxwhole),%ecx
  812.     movl    C(r_affinetridesc)+atd_skinwidth,%edx
  813.  
  814.     movl    %ecx,advancetable+4    // advance base in t
  815.     addl    %edx,%ecx
  816.  
  817.     movl    %ecx,advancetable    // advance extra in t
  818.     movl    C(a_tstepxfrac),%ecx
  819.  
  820.     movw    C(r_lstepx),%cx
  821.     movl    %eax,%edx            // count
  822.  
  823.     movl    %ecx,tstep
  824.     addl    $7,%edx
  825.  
  826.     shrl    $3,%edx                // count of full and partial loops
  827.     movl    spanpackage_t_sfrac(%esi),%ebx
  828.  
  829.     movw    %dx,%bx
  830.     movl    spanpackage_t_pz(%esi),%ecx
  831.  
  832.     negl    %eax
  833.  
  834.     movl    spanpackage_t_pdest(%esi),%edi
  835.     andl    $7,%eax        // 0->0, 1->7, 2->6, ... , 7->1
  836.  
  837.     subl    %eax,%edi    // compensate for hardwired offsets
  838.     subl    %eax,%ecx
  839.  
  840.     subl    %eax,%ecx
  841.     movl    spanpackage_t_tfrac(%esi),%edx
  842.  
  843.     movw    spanpackage_t_light(%esi),%dx
  844.     movl    spanpackage_t_zi(%esi),%ebp
  845.  
  846.     rorl    $16,%ebp    // put high 16 bits of 1/z in low word
  847.     pushl    %esi
  848.  
  849.     movl    spanpackage_t_ptex(%esi),%esi
  850.     jmp        aff8entryvec_table(,%eax,4)
  851.  
  852. // %bx = count of full and partial loops
  853. // %ebx high word = sfrac
  854. // %ecx = pz
  855. // %dx = light
  856. // %edx high word = tfrac
  857. // %esi = ptex
  858. // %edi = pdest
  859. // %ebp = 1/z
  860. // tstep low word = C(r_lstepx)
  861. // tstep high word = C(a_tstepxfrac)
  862. // C(a_sstepxfrac) low word = 0
  863. // C(a_sstepxfrac) high word = C(a_sstepxfrac)
  864.  
  865. LDrawLoop:
  866.  
  867. // FIXME: do we need to clamp light? We may need at least a buffer bit to
  868. // keep it from poking into tfrac and causing problems
  869.  
  870. LDraw8:
  871.     cmpw    (%ecx),%bp
  872.     jl        Lp1
  873.     xorl    %eax,%eax
  874.     movb    %dh,%ah
  875.     movb    (%esi),%al
  876.     movw    %bp,(%ecx)
  877.     movb    0x12345678(%eax),%al
  878. LPatch8:
  879.     movb    %al,(%edi)
  880. Lp1:
  881.     addl    tstep,%edx
  882.     sbbl    %eax,%eax
  883.     addl    lzistepx,%ebp
  884.     adcl    $0,%ebp
  885.     addl    C(a_sstepxfrac),%ebx
  886.     adcl    advancetable+4(,%eax,4),%esi
  887.  
  888. LDraw7:
  889.     cmpw    2(%ecx),%bp
  890.     jl        Lp2
  891.     xorl    %eax,%eax
  892.     movb    %dh,%ah
  893.     movb    (%esi),%al
  894.     movw    %bp,2(%ecx)
  895.     movb    0x12345678(%eax),%al
  896. LPatch7:
  897.     movb    %al,1(%edi)
  898. Lp2:
  899.     addl    tstep,%edx
  900.     sbbl    %eax,%eax
  901.     addl    lzistepx,%ebp
  902.     adcl    $0,%ebp
  903.     addl    C(a_sstepxfrac),%ebx
  904.     adcl    advancetable+4(,%eax,4),%esi
  905.  
  906. LDraw6:
  907.     cmpw    4(%ecx),%bp
  908.     jl        Lp3
  909.     xorl    %eax,%eax
  910.     movb    %dh,%ah
  911.     movb    (%esi),%al
  912.     movw    %bp,4(%ecx)
  913.     movb    0x12345678(%eax),%al
  914. LPatch6:
  915.     movb    %al,2(%edi)
  916. Lp3:
  917.     addl    tstep,%edx
  918.     sbbl    %eax,%eax
  919.     addl    lzistepx,%ebp
  920.     adcl    $0,%ebp
  921.     addl    C(a_sstepxfrac),%ebx
  922.     adcl    advancetable+4(,%eax,4),%esi
  923.  
  924. LDraw5:
  925.     cmpw    6(%ecx),%bp
  926.     jl        Lp4
  927.     xorl    %eax,%eax
  928.     movb    %dh,%ah
  929.     movb    (%esi),%al
  930.     movw    %bp,6(%ecx)
  931.     movb    0x12345678(%eax),%al
  932. LPatch5:
  933.     movb    %al,3(%edi)
  934. Lp4:
  935.     addl    tstep,%edx
  936.     sbbl    %eax,%eax
  937.     addl    lzistepx,%ebp
  938.     adcl    $0,%ebp
  939.     addl    C(a_sstepxfrac),%ebx
  940.     adcl    advancetable+4(,%eax,4),%esi
  941.  
  942. LDraw4:
  943.     cmpw    8(%ecx),%bp
  944.     jl        Lp5
  945.     xorl    %eax,%eax
  946.     movb    %dh,%ah
  947.     movb    (%esi),%al
  948.     movw    %bp,8(%ecx)
  949.     movb    0x12345678(%eax),%al
  950. LPatch4:
  951.     movb    %al,4(%edi)
  952. Lp5:
  953.     addl    tstep,%edx
  954.     sbbl    %eax,%eax
  955.     addl    lzistepx,%ebp
  956.     adcl    $0,%ebp
  957.     addl    C(a_sstepxfrac),%ebx
  958.     adcl    advancetable+4(,%eax,4),%esi
  959.  
  960. LDraw3:
  961.     cmpw    10(%ecx),%bp
  962.     jl        Lp6
  963.     xorl    %eax,%eax
  964.     movb    %dh,%ah
  965.     movb    (%esi),%al
  966.     movw    %bp,10(%ecx)
  967.     movb    0x12345678(%eax),%al
  968. LPatch3:
  969.     movb    %al,5(%edi)
  970. Lp6:
  971.     addl    tstep,%edx
  972.     sbbl    %eax,%eax
  973.     addl    lzistepx,%ebp
  974.     adcl    $0,%ebp
  975.     addl    C(a_sstepxfrac),%ebx
  976.     adcl    advancetable+4(,%eax,4),%esi
  977.  
  978. LDraw2:
  979.     cmpw    12(%ecx),%bp
  980.     jl        Lp7
  981.     xorl    %eax,%eax
  982.     movb    %dh,%ah
  983.     movb    (%esi),%al
  984.     movw    %bp,12(%ecx)
  985.     movb    0x12345678(%eax),%al
  986. LPatch2:
  987.     movb    %al,6(%edi)
  988. Lp7:
  989.     addl    tstep,%edx
  990.     sbbl    %eax,%eax
  991.     addl    lzistepx,%ebp
  992.     adcl    $0,%ebp
  993.     addl    C(a_sstepxfrac),%ebx
  994.     adcl    advancetable+4(,%eax,4),%esi
  995.  
  996. LDraw1:
  997.     cmpw    14(%ecx),%bp
  998.     jl        Lp8
  999.     xorl    %eax,%eax
  1000.     movb    %dh,%ah
  1001.     movb    (%esi),%al
  1002.     movw    %bp,14(%ecx)
  1003.     movb    0x12345678(%eax),%al
  1004. LPatch1:
  1005.     movb    %al,7(%edi)
  1006. Lp8:
  1007.     addl    tstep,%edx
  1008.     sbbl    %eax,%eax
  1009.     addl    lzistepx,%ebp
  1010.     adcl    $0,%ebp
  1011.     addl    C(a_sstepxfrac),%ebx
  1012.     adcl    advancetable+4(,%eax,4),%esi
  1013.  
  1014.     addl    $8,%edi
  1015.     addl    $16,%ecx
  1016.  
  1017.     decw    %bx
  1018.     jnz        LDrawLoop
  1019.  
  1020.     popl    %esi                // restore spans pointer
  1021. LNextSpan:
  1022.     addl    $(spanpackage_t_size),%esi    // point to next span
  1023. LNextSpanESISet:
  1024.     movl    spanpackage_t_count(%esi),%edx
  1025.     cmpl    $-999999,%edx        // any more spans?
  1026.     jnz        LSpanLoop            // yes
  1027.  
  1028.     popl    %edi
  1029.     popl    %ebp                // restore the caller's stack frame
  1030.     popl    %ebx                // restore register variables
  1031.     popl    %esi
  1032.     ret
  1033.  
  1034.  
  1035. // draw a one-long span
  1036.  
  1037. LExactlyOneLong:
  1038.  
  1039.     movl    spanpackage_t_pz(%esi),%ecx
  1040.     movl    spanpackage_t_zi(%esi),%ebp
  1041.  
  1042.     rorl    $16,%ebp    // put high 16 bits of 1/z in low word
  1043.     movl    spanpackage_t_ptex(%esi),%ebx
  1044.  
  1045.     cmpw    (%ecx),%bp
  1046.     jl        LNextSpan
  1047.     xorl    %eax,%eax
  1048.     movl    spanpackage_t_pdest(%esi),%edi
  1049.     movb    spanpackage_t_light+1(%esi),%ah
  1050.     addl    $(spanpackage_t_size),%esi    // point to next span
  1051.     movb    (%ebx),%al
  1052.     movw    %bp,(%ecx)
  1053.     movb    0x12345678(%eax),%al
  1054. LPatch9:
  1055.     movb    %al,(%edi)
  1056.  
  1057.     jmp        LNextSpanESISet
  1058.  
  1059. .globl C(D_PolysetAff8End)
  1060. C(D_PolysetAff8End):
  1061.  
  1062.  
  1063. #define pcolormap        4
  1064.  
  1065. .globl C(D_Aff8Patch)
  1066. C(D_Aff8Patch):
  1067.     movl    pcolormap(%esp),%eax
  1068.     movl    %eax,LPatch1-4
  1069.     movl    %eax,LPatch2-4
  1070.     movl    %eax,LPatch3-4
  1071.     movl    %eax,LPatch4-4
  1072.     movl    %eax,LPatch5-4
  1073.     movl    %eax,LPatch6-4
  1074.     movl    %eax,LPatch7-4
  1075.     movl    %eax,LPatch8-4
  1076.     movl    %eax,LPatch9-4
  1077.  
  1078.     ret
  1079.  
  1080.  
  1081. //----------------------------------------------------------------------
  1082. // Alias model polygon dispatching code, combined with subdivided affine
  1083. // triangle drawing code
  1084. //----------------------------------------------------------------------
  1085.  
  1086. .globl C(D_PolysetDraw)
  1087. C(D_PolysetDraw):
  1088.  
  1089. //    spanpackage_t    spans[DPS_MAXSPANS + 1 +
  1090. //            ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
  1091. //                        // one extra because of cache line pretouching
  1092. //
  1093. //    a_spans = (spanpackage_t *)
  1094. //            (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
  1095.     subl    $(SPAN_SIZE),%esp
  1096.     movl    %esp,%eax
  1097.     addl    $(CACHE_SIZE - 1),%eax
  1098.     andl    $(~(CACHE_SIZE - 1)),%eax
  1099.     movl    %eax,C(a_spans)
  1100.  
  1101. //    if (r_affinetridesc.drawtype)
  1102. //        D_DrawSubdiv ();
  1103. //    else
  1104. //        D_DrawNonSubdiv ();
  1105.     movl    C(r_affinetridesc)+atd_drawtype,%eax
  1106.     testl    %eax,%eax
  1107.     jz        C(D_DrawNonSubdiv)
  1108.  
  1109.     pushl    %ebp                // preserve caller stack frame pointer
  1110.  
  1111. //    lnumtriangles = r_affinetridesc.numtriangles;
  1112.     movl    C(r_affinetridesc)+atd_numtriangles,%ebp
  1113.  
  1114.     pushl    %esi                // preserve register variables
  1115.     shll    $4,%ebp
  1116.  
  1117.     pushl    %ebx
  1118. //    ptri = r_affinetridesc.ptriangles;
  1119.     movl    C(r_affinetridesc)+atd_ptriangles,%ebx
  1120.  
  1121.     pushl    %edi
  1122.  
  1123. //    mtriangle_t        *ptri;
  1124. //    finalvert_t        *pfv, *index0, *index1, *index2;
  1125. //    int                i;
  1126. //    int                lnumtriangles;
  1127. //    int                s0, s1, s2;
  1128.  
  1129. //    pfv = r_affinetridesc.pfinalverts;
  1130.     movl    C(r_affinetridesc)+atd_pfinalverts,%edi
  1131.  
  1132. //    for (i=0 ; i<lnumtriangles ; i++)
  1133. //    {
  1134.  
  1135. Llooptop:
  1136.  
  1137. //        index0 = pfv + ptri[i].vertindex[0];
  1138. //        index1 = pfv + ptri[i].vertindex[1];
  1139. //        index2 = pfv + ptri[i].vertindex[2];
  1140.     movl    mtri_vertindex-16+0(%ebx,%ebp,),%ecx
  1141.     movl    mtri_vertindex-16+4(%ebx,%ebp,),%esi
  1142.  
  1143.     shll    $(fv_shift),%ecx
  1144.     movl    mtri_vertindex-16+8(%ebx,%ebp,),%edx
  1145.  
  1146.     shll    $(fv_shift),%esi
  1147.     addl    %edi,%ecx
  1148.  
  1149.     shll    $(fv_shift),%edx
  1150.     addl    %edi,%esi
  1151.  
  1152.     addl    %edi,%edx
  1153.  
  1154. //        if (((index0->v[1]-index1->v[1]) *
  1155. //                (index0->v[0]-index2->v[0]) -
  1156. //                (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
  1157. //        {
  1158. //            continue;
  1159. //        }
  1160. //
  1161. //        d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
  1162.     fildl    fv_v+4(%ecx)    // i0v1
  1163.     fildl    fv_v+4(%esi)    // i1v1 | i0v1
  1164.     fildl    fv_v+0(%ecx)    // i0v0 | i1v1 | i0v1
  1165.     fildl    fv_v+0(%edx)    // i2v0 | i0v0 | i1v1 | i0v1
  1166.     fxch    %st(2)            // i1v1 | i0v0 | i2v0 | i0v1
  1167.     fsubr    %st(3),%st(0)    // i0v1-i1v1 | i0v0 | i2v0 | i0v1
  1168.     fildl    fv_v+0(%esi)    // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
  1169.     fxch    %st(2)            // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
  1170.     fsub    %st(0),%st(3)    // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
  1171.     fildl    fv_v+4(%edx)    // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
  1172.     fxch    %st(1)            // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
  1173.     fsubp    %st(0),%st(3)    // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
  1174.     fxch    %st(1)            // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
  1175.     fmulp    %st(0),%st(3)    // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
  1176.     fsubrp    %st(0),%st(3)    // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
  1177.     movl    fv_v+16(%ecx),%eax
  1178.     andl    $0xFF00,%eax
  1179.     fmulp    %st(0),%st(2)    // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
  1180.     addl    C(acolormap),%eax
  1181.     fsubp    %st(0),%st(1)    // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
  1182.     movl    %eax,C(d_pcolormap)
  1183.     fstps    Ltemp
  1184.     movl    Ltemp,%eax
  1185.     subl    $0x80000001,%eax
  1186.     jc        Lskip
  1187.  
  1188. //        if (ptri[i].facesfront)
  1189. //        {
  1190. //            D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
  1191.     movl    mtri_facesfront-16(%ebx,%ebp,),%eax
  1192.     testl    %eax,%eax
  1193.     jz        Lfacesback
  1194.  
  1195.     pushl    %edx
  1196.     pushl    %esi
  1197.     pushl    %ecx
  1198.     call    C(D_PolysetRecursiveTriangle)
  1199.  
  1200.     subl    $16,%ebp
  1201.     jnz        Llooptop
  1202.     jmp        Ldone2
  1203.  
  1204. //        }
  1205. //        else
  1206. //        {
  1207. Lfacesback:
  1208.  
  1209. //            s0 = index0->v[2];
  1210. //            s1 = index1->v[2];
  1211. //            s2 = index2->v[2];
  1212.     movl    fv_v+8(%ecx),%eax
  1213.     pushl    %eax
  1214.     movl    fv_v+8(%esi),%eax
  1215.     pushl    %eax
  1216.     movl    fv_v+8(%edx),%eax
  1217.     pushl    %eax
  1218.     pushl    %ecx
  1219.     pushl    %edx
  1220.  
  1221. //            if (index0->flags & ALIAS_ONSEAM)
  1222. //                index0->v[2] += r_affinetridesc.seamfixupX16;
  1223.     movl    C(r_affinetridesc)+atd_seamfixupX16,%eax
  1224.     testl    $(ALIAS_ONSEAM),fv_flags(%ecx)
  1225.     jz        Lp11
  1226.     addl    %eax,fv_v+8(%ecx)
  1227. Lp11:
  1228.  
  1229. //            if (index1->flags & ALIAS_ONSEAM)
  1230. //                index1->v[2] += r_affinetridesc.seamfixupX16;
  1231.     testl    $(ALIAS_ONSEAM),fv_flags(%esi)
  1232.     jz        Lp12
  1233.     addl    %eax,fv_v+8(%esi)
  1234. Lp12:
  1235.  
  1236. //            if (index2->flags & ALIAS_ONSEAM)
  1237. //                index2->v[2] += r_affinetridesc.seamfixupX16;
  1238.     testl    $(ALIAS_ONSEAM),fv_flags(%edx)
  1239.     jz        Lp13
  1240.     addl    %eax,fv_v+8(%edx)
  1241. Lp13:
  1242.  
  1243. //            D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
  1244.     pushl    %edx
  1245.     pushl    %esi
  1246.     pushl    %ecx
  1247.     call    C(D_PolysetRecursiveTriangle)
  1248.  
  1249. //            index0->v[2] = s0;
  1250. //            index1->v[2] = s1;
  1251. //            index2->v[2] = s2;
  1252.     popl    %edx
  1253.     popl    %ecx
  1254.     popl    %eax
  1255.     movl    %eax,fv_v+8(%edx)
  1256.     popl    %eax
  1257.     movl    %eax,fv_v+8(%esi)
  1258.     popl    %eax
  1259.     movl    %eax,fv_v+8(%ecx)
  1260.  
  1261. //        }
  1262. //    }
  1263. Lskip:
  1264.     subl    $16,%ebp
  1265.     jnz        Llooptop
  1266.  
  1267. Ldone2:
  1268.     popl    %edi                // restore the caller's stack frame
  1269.     popl    %ebx
  1270.     popl    %esi                // restore register variables
  1271.     popl    %ebp
  1272.  
  1273.     addl    $(SPAN_SIZE),%esp
  1274.  
  1275.     ret
  1276.  
  1277.  
  1278. //----------------------------------------------------------------------
  1279. // Alias model triangle left-edge scanning code
  1280. //----------------------------------------------------------------------
  1281.  
  1282. #define height    4+16
  1283.  
  1284. .globl C(D_PolysetScanLeftEdge)
  1285. C(D_PolysetScanLeftEdge):
  1286.     pushl    %ebp                // preserve caller stack frame pointer
  1287.     pushl    %esi                // preserve register variables
  1288.     pushl    %edi
  1289.     pushl    %ebx
  1290.  
  1291.     movl    height(%esp),%eax
  1292.     movl    C(d_sfrac),%ecx
  1293.     andl    $0xFFFF,%eax
  1294.     movl    C(d_ptex),%ebx
  1295.     orl        %eax,%ecx
  1296.     movl    C(d_pedgespanpackage),%esi
  1297.     movl    C(d_tfrac),%edx
  1298.     movl    C(d_light),%edi
  1299.     movl    C(d_zi),%ebp
  1300.  
  1301. // %eax: scratch
  1302. // %ebx: d_ptex
  1303. // %ecx: d_sfrac in high word, count in low word
  1304. // %edx: d_tfrac
  1305. // %esi: d_pedgespanpackage, errorterm, scratch alternately
  1306. // %edi: d_light
  1307. // %ebp: d_zi
  1308.  
  1309. //    do
  1310. //    {
  1311.  
  1312. LScanLoop:
  1313.  
  1314. //        d_pedgespanpackage->ptex = ptex;
  1315. //        d_pedgespanpackage->pdest = d_pdest;
  1316. //        d_pedgespanpackage->pz = d_pz;
  1317. //        d_pedgespanpackage->count = d_aspancount;
  1318. //        d_pedgespanpackage->light = d_light;
  1319. //        d_pedgespanpackage->zi = d_zi;
  1320. //        d_pedgespanpackage->sfrac = d_sfrac << 16;
  1321. //        d_pedgespanpackage->tfrac = d_tfrac << 16;
  1322.     movl    %ebx,spanpackage_t_ptex(%esi)
  1323.     movl    C(d_pdest),%eax
  1324.     movl    %eax,spanpackage_t_pdest(%esi)
  1325.     movl    C(d_pz),%eax
  1326.     movl    %eax,spanpackage_t_pz(%esi)
  1327.     movl    C(d_aspancount),%eax
  1328.     movl    %eax,spanpackage_t_count(%esi)
  1329.     movl    %edi,spanpackage_t_light(%esi)
  1330.     movl    %ebp,spanpackage_t_zi(%esi)
  1331.     movl    %ecx,spanpackage_t_sfrac(%esi)
  1332.     movl    %edx,spanpackage_t_tfrac(%esi)
  1333.  
  1334. // pretouch the next cache line
  1335.     movb    spanpackage_t_size(%esi),%al
  1336.  
  1337. //        d_pedgespanpackage++;
  1338.     addl    $(spanpackage_t_size),%esi
  1339.     movl    C(erroradjustup),%eax
  1340.     movl    %esi,C(d_pedgespanpackage)
  1341.  
  1342. //        errorterm += erroradjustup;
  1343.     movl    C(errorterm),%esi
  1344.     addl    %eax,%esi
  1345.     movl    C(d_pdest),%eax
  1346.  
  1347. //        if (errorterm >= 0)
  1348. //        {
  1349.     js        LNoLeftEdgeTurnover
  1350.  
  1351. //            errorterm -= erroradjustdown;
  1352. //            d_pdest += d_pdestextrastep;
  1353.     subl    C(erroradjustdown),%esi
  1354.     addl    C(d_pdestextrastep),%eax
  1355.     movl    %esi,C(errorterm)
  1356.     movl    %eax,C(d_pdest)
  1357.  
  1358. //            d_pz += d_pzextrastep;
  1359. //            d_aspancount += d_countextrastep;
  1360. //            d_ptex += d_ptexextrastep;
  1361. //            d_sfrac += d_sfracextrastep;
  1362. //            d_ptex += d_sfrac >> 16;
  1363. //            d_sfrac &= 0xFFFF;
  1364. //            d_tfrac += d_tfracextrastep;
  1365.     movl    C(d_pz),%eax
  1366.     movl    C(d_aspancount),%esi
  1367.     addl    C(d_pzextrastep),%eax
  1368.     addl    C(d_sfracextrastep),%ecx
  1369.     adcl    C(d_ptexextrastep),%ebx
  1370.     addl    C(d_countextrastep),%esi
  1371.     movl    %eax,C(d_pz)
  1372.     movl    C(d_tfracextrastep),%eax
  1373.     movl    %esi,C(d_aspancount)
  1374.     addl    %eax,%edx
  1375.  
  1376. //            if (d_tfrac & 0x10000)
  1377. //            {
  1378.     jnc        LSkip1
  1379.  
  1380. //                d_ptex += r_affinetridesc.skinwidth;
  1381. //                d_tfrac &= 0xFFFF;
  1382.     addl    C(r_affinetridesc)+atd_skinwidth,%ebx
  1383.  
  1384. //            }
  1385.  
  1386. LSkip1:
  1387.  
  1388. //            d_light += d_lightextrastep;
  1389. //            d_zi += d_ziextrastep;
  1390.     addl    C(d_lightextrastep),%edi
  1391.     addl    C(d_ziextrastep),%ebp
  1392.  
  1393. //        }
  1394.     movl    C(d_pedgespanpackage),%esi
  1395.     decl    %ecx
  1396.     testl    $0xFFFF,%ecx
  1397.     jnz        LScanLoop
  1398.  
  1399.     popl    %ebx
  1400.     popl    %edi
  1401.     popl    %esi
  1402.     popl    %ebp
  1403.     ret
  1404.  
  1405. //        else
  1406. //        {
  1407.  
  1408. LNoLeftEdgeTurnover:
  1409.     movl    %esi,C(errorterm)
  1410.  
  1411. //            d_pdest += d_pdestbasestep;
  1412.     addl    C(d_pdestbasestep),%eax
  1413.     movl    %eax,C(d_pdest)
  1414.  
  1415. //            d_pz += d_pzbasestep;
  1416. //            d_aspancount += ubasestep;
  1417. //            d_ptex += d_ptexbasestep;
  1418. //            d_sfrac += d_sfracbasestep;
  1419. //            d_ptex += d_sfrac >> 16;
  1420. //            d_sfrac &= 0xFFFF;
  1421.     movl    C(d_pz),%eax
  1422.     movl    C(d_aspancount),%esi
  1423.     addl    C(d_pzbasestep),%eax
  1424.     addl    C(d_sfracbasestep),%ecx
  1425.     adcl    C(d_ptexbasestep),%ebx
  1426.     addl    C(ubasestep),%esi
  1427.     movl    %eax,C(d_pz)
  1428.     movl    %esi,C(d_aspancount)
  1429.  
  1430. //            d_tfrac += d_tfracbasestep;
  1431.     movl    C(d_tfracbasestep),%esi
  1432.     addl    %esi,%edx
  1433.  
  1434. //            if (d_tfrac & 0x10000)
  1435. //            {
  1436.     jnc        LSkip2
  1437.  
  1438. //                d_ptex += r_affinetridesc.skinwidth;
  1439. //                d_tfrac &= 0xFFFF;
  1440.     addl    C(r_affinetridesc)+atd_skinwidth,%ebx
  1441.  
  1442. //            }
  1443.  
  1444. LSkip2:
  1445.  
  1446. //            d_light += d_lightbasestep;
  1447. //            d_zi += d_zibasestep;
  1448.     addl    C(d_lightbasestep),%edi
  1449.     addl    C(d_zibasestep),%ebp
  1450.  
  1451. //        }
  1452. //    } while (--height);
  1453.     movl    C(d_pedgespanpackage),%esi
  1454.     decl    %ecx
  1455.     testl    $0xFFFF,%ecx
  1456.     jnz        LScanLoop
  1457.  
  1458.     popl    %ebx
  1459.     popl    %edi
  1460.     popl    %esi
  1461.     popl    %ebp
  1462.     ret
  1463.  
  1464.  
  1465. //----------------------------------------------------------------------
  1466. // Alias model vertex drawing code
  1467. //----------------------------------------------------------------------
  1468.  
  1469. #define fv            4+8
  1470. #define    numverts    8+8
  1471.  
  1472. .globl C(D_PolysetDrawFinalVerts)
  1473. C(D_PolysetDrawFinalVerts):
  1474.     pushl    %ebp                // preserve caller stack frame pointer
  1475.     pushl    %ebx
  1476.  
  1477. //    int        i, z;
  1478. //    short    *zbuf;
  1479.  
  1480.     movl    numverts(%esp),%ecx
  1481.     movl    fv(%esp),%ebx
  1482.  
  1483.     pushl    %esi                // preserve register variables
  1484.     pushl    %edi
  1485.  
  1486. LFVLoop:
  1487.  
  1488. //    for (i=0 ; i<numverts ; i++, fv++)
  1489. //    {
  1490. //    // valid triangle coordinates for filling can include the bottom and
  1491. //    // right clip edges, due to the fill rule; these shouldn't be drawn
  1492. //        if ((fv->v[0] < r_refdef.vrectright) &&
  1493. //            (fv->v[1] < r_refdef.vrectbottom))
  1494. //        {
  1495.     movl    fv_v+0(%ebx),%eax
  1496.     movl    C(r_refdef)+rd_vrectright,%edx
  1497.     cmpl    %edx,%eax
  1498.     jge        LNextVert
  1499.     movl    fv_v+4(%ebx),%esi
  1500.     movl    C(r_refdef)+rd_vrectbottom,%edx
  1501.     cmpl    %edx,%esi
  1502.     jge        LNextVert
  1503.  
  1504. //            zbuf = zspantable[fv->v[1]] + fv->v[0];
  1505.     movl    C(zspantable)(,%esi,4),%edi
  1506.  
  1507. //            z = fv->v[5]>>16;
  1508.     movl    fv_v+20(%ebx),%edx
  1509.     shrl    $16,%edx
  1510.  
  1511. //            if (z >= *zbuf)
  1512. //            {
  1513. //                int        pix;
  1514.     cmpw    (%edi,%eax,2),%dx
  1515.     jl        LNextVert
  1516.  
  1517. //                *zbuf = z;
  1518.     movw    %dx,(%edi,%eax,2)
  1519.  
  1520. //                pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
  1521.     movl    fv_v+12(%ebx),%edi
  1522.     shrl    $16,%edi
  1523.     movl    C(skintable)(,%edi,4),%edi
  1524.     movl    fv_v+8(%ebx),%edx
  1525.     shrl    $16,%edx
  1526.     movb    (%edi,%edx),%dl
  1527.  
  1528. //                pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
  1529.     movl    fv_v+16(%ebx),%edi
  1530.     andl    $0xFF00,%edi
  1531.     andl    $0x00FF,%edx
  1532.     addl    %edx,%edi
  1533.     movl    C(acolormap),%edx
  1534.     movb    (%edx,%edi,1),%dl
  1535.  
  1536. //                d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
  1537.     movl    C(d_scantable)(,%esi,4),%edi
  1538.     movl    C(d_viewbuffer),%esi
  1539.     addl    %eax,%edi
  1540.     movb    %dl,(%esi,%edi)
  1541.  
  1542. //            }
  1543. //        }
  1544. //    }
  1545. LNextVert:
  1546.     addl    $(fv_size),%ebx
  1547.     decl    %ecx
  1548.     jnz        LFVLoop
  1549.  
  1550.     popl    %edi
  1551.     popl    %esi
  1552.     popl    %ebx
  1553.     popl    %ebp
  1554.     ret
  1555.  
  1556.  
  1557. //----------------------------------------------------------------------
  1558. // Alias model non-subdivided polygon dispatching code
  1559. //
  1560. // not C-callable because of stack buffer cleanup
  1561. //----------------------------------------------------------------------
  1562.  
  1563. .globl C(D_DrawNonSubdiv)
  1564. C(D_DrawNonSubdiv):
  1565.     pushl    %ebp                // preserve caller stack frame pointer
  1566.     movl    C(r_affinetridesc)+atd_numtriangles,%ebp
  1567.     pushl    %ebx
  1568.     shll    $(mtri_shift),%ebp
  1569.     pushl    %esi                // preserve register variables
  1570.     movl    C(r_affinetridesc)+atd_ptriangles,%esi
  1571.     pushl    %edi
  1572.  
  1573. //    mtriangle_t        *ptri;
  1574. //    finalvert_t        *pfv, *index0, *index1, *index2;
  1575. //    int                i;
  1576. //    int                lnumtriangles;
  1577.  
  1578. //    pfv = r_affinetridesc.pfinalverts;
  1579. //    ptri = r_affinetridesc.ptriangles;
  1580. //    lnumtriangles = r_affinetridesc.numtriangles;
  1581.  
  1582. LNDLoop:
  1583.  
  1584. //    for (i=0 ; i<lnumtriangles ; i++, ptri++)
  1585. //    {
  1586. //        index0 = pfv + ptri->vertindex[0];
  1587. //        index1 = pfv + ptri->vertindex[1];
  1588. //        index2 = pfv + ptri->vertindex[2];
  1589.     movl    C(r_affinetridesc)+atd_pfinalverts,%edi
  1590.     movl    mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
  1591.     shll    $(fv_shift),%ecx
  1592.     movl    mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
  1593.     shll    $(fv_shift),%edx
  1594.     movl    mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
  1595.     shll    $(fv_shift),%ebx
  1596.     addl    %edi,%ecx
  1597.     addl    %edi,%edx
  1598.     addl    %edi,%ebx
  1599.  
  1600. //        d_xdenom = (index0->v[1]-index1->v[1]) *
  1601. //                (index0->v[0]-index2->v[0]) -
  1602. //                (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
  1603.     movl    fv_v+4(%ecx),%eax
  1604.     movl    fv_v+0(%ecx),%esi
  1605.     subl    fv_v+4(%edx),%eax
  1606.     subl    fv_v+0(%ebx),%esi
  1607.     imull    %esi,%eax
  1608.     movl    fv_v+0(%ecx),%esi
  1609.     movl    fv_v+4(%ecx),%edi
  1610.     subl    fv_v+0(%edx),%esi
  1611.     subl    fv_v+4(%ebx),%edi
  1612.     imull    %esi,%edi
  1613.     subl    %edi,%eax
  1614.  
  1615. //        if (d_xdenom >= 0)
  1616. //        {
  1617. //            continue;
  1618.     jns        LNextTri
  1619.  
  1620. //        }
  1621.  
  1622.     movl    %eax,C(d_xdenom)
  1623.     fildl    C(d_xdenom)
  1624.  
  1625. //        r_p0[0] = index0->v[0];        // u
  1626. //        r_p0[1] = index0->v[1];        // v
  1627. //        r_p0[2] = index0->v[2];        // s
  1628. //        r_p0[3] = index0->v[3];        // t
  1629. //        r_p0[4] = index0->v[4];        // light
  1630. //        r_p0[5] = index0->v[5];        // iz
  1631.     movl    fv_v+0(%ecx),%eax
  1632.     movl    fv_v+4(%ecx),%esi
  1633.     movl    %eax,C(r_p0)+0
  1634.     movl    %esi,C(r_p0)+4
  1635.     movl    fv_v+8(%ecx),%eax
  1636.     movl    fv_v+12(%ecx),%esi
  1637.     movl    %eax,C(r_p0)+8
  1638.     movl    %esi,C(r_p0)+12
  1639.     movl    fv_v+16(%ecx),%eax
  1640.     movl    fv_v+20(%ecx),%esi
  1641.     movl    %eax,C(r_p0)+16
  1642.     movl    %esi,C(r_p0)+20
  1643.  
  1644.     fdivrs    float_1
  1645.  
  1646. //        r_p1[0] = index1->v[0];
  1647. //        r_p1[1] = index1->v[1];
  1648. //        r_p1[2] = index1->v[2];
  1649. //        r_p1[3] = index1->v[3];
  1650. //        r_p1[4] = index1->v[4];
  1651. //        r_p1[5] = index1->v[5];
  1652.     movl    fv_v+0(%edx),%eax
  1653.     movl    fv_v+4(%edx),%esi
  1654.     movl    %eax,C(r_p1)+0
  1655.     movl    %esi,C(r_p1)+4
  1656.     movl    fv_v+8(%edx),%eax
  1657.     movl    fv_v+12(%edx),%esi
  1658.     movl    %eax,C(r_p1)+8
  1659.     movl    %esi,C(r_p1)+12
  1660.     movl    fv_v+16(%edx),%eax
  1661.     movl    fv_v+20(%edx),%esi
  1662.     movl    %eax,C(r_p1)+16
  1663.     movl    %esi,C(r_p1)+20
  1664.  
  1665. //        r_p2[0] = index2->v[0];
  1666. //        r_p2[1] = index2->v[1];
  1667. //        r_p2[2] = index2->v[2];
  1668. //        r_p2[3] = index2->v[3];
  1669. //        r_p2[4] = index2->v[4];
  1670. //        r_p2[5] = index2->v[5];
  1671.     movl    fv_v+0(%ebx),%eax
  1672.     movl    fv_v+4(%ebx),%esi
  1673.     movl    %eax,C(r_p2)+0
  1674.     movl    %esi,C(r_p2)+4
  1675.     movl    fv_v+8(%ebx),%eax
  1676.     movl    fv_v+12(%ebx),%esi
  1677.     movl    %eax,C(r_p2)+8
  1678.     movl    %esi,C(r_p2)+12
  1679.     movl    fv_v+16(%ebx),%eax
  1680.     movl    fv_v+20(%ebx),%esi
  1681.     movl    %eax,C(r_p2)+16
  1682.     movl    C(r_affinetridesc)+atd_ptriangles,%edi
  1683.     movl    %esi,C(r_p2)+20
  1684.     movl    mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
  1685.  
  1686. //        if (!ptri->facesfront)
  1687. //        {
  1688.     testl    %eax,%eax
  1689.     jnz        LFacesFront
  1690.  
  1691. //            if (index0->flags & ALIAS_ONSEAM)
  1692. //                r_p0[2] += r_affinetridesc.seamfixupX16;
  1693.     movl    fv_flags(%ecx),%eax
  1694.     movl    fv_flags(%edx),%esi
  1695.     movl    fv_flags(%ebx),%edi
  1696.     testl    $(ALIAS_ONSEAM),%eax
  1697.     movl    C(r_affinetridesc)+atd_seamfixupX16,%eax
  1698.     jz        LOnseamDone0
  1699.     addl    %eax,C(r_p0)+8
  1700. LOnseamDone0:
  1701.  
  1702. //            if (index1->flags & ALIAS_ONSEAM)
  1703. //                 r_p1[2] += r_affinetridesc.seamfixupX16;
  1704.     testl    $(ALIAS_ONSEAM),%esi
  1705.     jz        LOnseamDone1
  1706.     addl    %eax,C(r_p1)+8
  1707. LOnseamDone1:
  1708.  
  1709. //            if (index2->flags & ALIAS_ONSEAM)
  1710. //                r_p2[2] += r_affinetridesc.seamfixupX16;
  1711.     testl    $(ALIAS_ONSEAM),%edi
  1712.     jz        LOnseamDone2
  1713.     addl    %eax,C(r_p2)+8
  1714. LOnseamDone2:
  1715.  
  1716. //        }
  1717.  
  1718. LFacesFront:
  1719.  
  1720.     fstps    C(d_xdenom)
  1721.  
  1722. //        D_PolysetSetEdgeTable ();
  1723. //        D_RasterizeAliasPolySmooth ();
  1724.         call    C(D_PolysetSetEdgeTable)
  1725.         call    C(D_RasterizeAliasPolySmooth)
  1726.  
  1727. LNextTri:
  1728.         movl    C(r_affinetridesc)+atd_ptriangles,%esi
  1729.         subl    $16,%ebp
  1730.         jnz        LNDLoop
  1731. //    }
  1732.  
  1733.     popl    %edi
  1734.     popl    %esi
  1735.     popl    %ebx
  1736.     popl    %ebp
  1737.  
  1738.     addl    $(SPAN_SIZE),%esp
  1739.  
  1740.     ret
  1741.  
  1742.  
  1743. #endif    // id386
  1744.  
  1745.